In the following all deconvolution tools for spaceDeconv are tested and run once to showcase and test all tools.
library(reticulate)
library(TENxVisiumData) # data
library(TabulaMurisSenisData) # data
library(ggspavis) # visualization
library(SPOTlight)
# setup python
reticulate::use_condaenv("cell2loc_env")
scanpy <- reticulate::import("scanpy")
reticulate::py_available()
## [1] TRUE
We are using two sample datasets from mouse kidneys.
# Spatial
sp <- TENxVisiumData::MouseKidneyCoronal()
rownames(sp) <- rowData(sp)$symbol # overwrite EnsemblID
# Single Cell
sc <- TabulaMurisSenisData::TabulaMurisSenisDroplet(tissues= "Kidney")$Kidney
Let’s have a look inside the datasets. The console output shows the available cell types and the corresponding number of available cells. The Plot visualizes the number of gene counts per spot and the corresponding Visium Image.
| 1m | 3m | 18m | 21m | 24m | 30m | |
|---|---|---|---|---|---|---|
| CD45 | 11 | 32 | 76 | 54 | 1010 | 106 |
| CD45 B cell | 7 | 5 | 45 | 54 | 2322 | 62 |
| CD45 NK cell | 1 | 4 | 8 | 2 | 47 | 4 |
| CD45 T cell | 8 | 18 | 48 | 42 | 846 | 177 |
| CD45 macrophage | 59 | 132 | 254 | 101 | 259 | 514 |
| CD45 plasma cell | 0 | 0 | 9 | 12 | 169 | 42 |
| Epcam kidney distal convoluted tubule epithelial cell | 78 | 126 | 169 | 153 | 0 | 131 |
| Epcam brush cell | 52 | 15 | 78 | 169 | 0 | 31 |
| Epcam kidney collecting duct principal cell | 77 | 110 | 132 | 58 | 0 | 370 |
| Epcam kidney proximal convoluted tubule epithelial cell | 945 | 684 | 1120 | 868 | 0 | 817 |
| Epcam podocyte | 92 | 94 | 64 | 66 | 0 | 170 |
| Epcam proximal tube epithelial cell | 25 | 195 | 296 | 5 | 0 | 1977 |
| Epcam thick ascending tube S epithelial cell | 465 | 312 | 248 | 228 | 0 | 313 |
| Pecam Kidney cortex artery cell | 75 | 78 | 91 | 69 | 0 | 115 |
| Pecam fenestrated capillary endothelial | 164 | 182 | 164 | 134 | 0 | 211 |
| Pecam kidney capillary endothelial cell | 49 | 38 | 25 | 18 | 0 | 7 |
| Stroma fibroblast | 15 | 16 | 37 | 13 | 0 | 80 |
| Stroma kidney mesangial cell | 80 | 51 | 18 | 22 | 0 | 7 |
| nan | 285 | 238 | 256 | 189 | 1068 | 579 |
We subset the cells to remove unsuitable cells and NAs. We further subset cells for performance reasons. In a full run this step will be skipped.
This section will further include normalization steps.
# We are only using data from 18m old cells
sc <- sc[, sc$age=="18m"]
sc <- sc[, !sc$free_annotation %in% c("nan", "CD45")] # remove false data
# downsampling for performance reasons, copied from MarcElosua/SPOTlight
idx <- split(seq(ncol(sc)), sc$free_annotation)
n_cells <- 50
cs_keep <- lapply(idx, function(i) {
n <- length(i)
if (n < n_cells)
n_cells <- n
sample(i, n_cells)
})
sc <- sc[, unlist(cs_keep)]
Marker Genes are necessary for SPOTlight. Here i follow the guideline presented on their website. This could be done differently. A vector of highly variable Genes is also helpful but not required.
# SPOTligth parameters
slot <- "cell_ontology_class" # where are the cell types stored, find better term for "slot"
mgs <- NULL # DataFrame of scored Marker genes
# + obviously the single cell and spatial dataset
colLabels(sc) <- colData(sc)[[slot]] # add Labels
# norm
sc <- scuttle::logNormCounts(sc) # score marksers
mgs <- scran::scoreMarkers(sc) # can include a subset step, requires logcounts slot/assay?
# this code is from the vignette
# one could perform this differently but the final dataframe needs to be present
mgs_fil <- lapply(names(mgs), function(i) {
x <- mgs[[i]]
# Filter and keep relevant marker genes, those with AUC > 0.8
x <- x[x$mean.AUC > 0.8, ]
# Sort the genes from highest to lowest weight
x <- x[order(x$mean.AUC, decreasing = TRUE), ]
# Add gene and cluster id to the dataframe
x$gene <- rownames(x)
x$cluster <- i
data.frame(x)
})
mgs_df <- do.call(rbind, mgs_fil)
deconvolution <- SPOTlight::SPOTlight(sc, sp, mgs=mgs_df, weight_id = "mean.AUC", verbose=TRUE)
## Warning in .set_groups_if_null(x): Grouping cells into celltypes
## by SingleCellExperiment::colLabels(x)
## Scaling count matrix
## Seeding initial matrices
## Training NMF model
## Time for training: 6.71min
## Deconvoluting mixture data
deconvolution <- deconvolution$mat # extract the result
knitr::kable(head(deconvolution[, 4:6])) # just a subset
| fenestrated cell | fibroblast | kidney capillary endothelial cell | |
|---|---|---|---|
| AAACCGTTCGTCCAGG-1 | 0.0868149 | 0.0521567 | 0.0528951 |
| AAACCTAAGCAGCCGG-1 | 0.0000000 | 0.0000000 | 0.0000000 |
| AAACGAGACGGTTGAT-1 | 0.0000000 | 0.0000000 | 0.0000000 |
| AAACGGTTGCGAACTG-1 | 0.0000000 | 0.0000000 | 0.0000000 |
| AAACTCGGTTCGCAAT-1 | 0.0631507 | 0.0472160 | 0.0589676 |
| AAACTGCTGGCTCCAA-1 | 0.0654315 | 0.0835577 | 0.1990505 |
# adding the results to colData
colnames(deconvolution) <- make.names(colnames(deconvolution))
for (i in colnames(deconvolution)){ # each separately
sp[[i]] <- deconvolution[, i]
}
# the colData annotation of the spatialExperiment now looks like this:
names(colData(sp))
## [1] "sample_id"
## [2] "nCounts"
## [3] "B.cell"
## [4] "brush.cell"
## [5] "epithelial.cell.of.proximal.tubule"
## [6] "fenestrated.cell"
## [7] "fibroblast"
## [8] "kidney.capillary.endothelial.cell"
## [9] "kidney.collecting.duct.principal.cell"
## [10] "kidney.cortex.artery.cell"
## [11] "kidney.distal.convoluted.tubule.epithelial.cell"
## [12] "kidney.loop.of.Henle.thick.ascending.limb.epithelial.cell"
## [13] "kidney.mesangial.cell"
## [14] "kidney.proximal.convoluted.tubule.epithelial.cell"
## [15] "macrophage"
## [16] "NK.cell"
## [17] "plasma.cell"
## [18] "podocyte"
## [19] "T.cell"
library(spacexr)
sc2 <- sc # using a copy in this case because we are removing cells
# first subset the sc reference, every cell type needs to be present more than 25 times!!!!!
cellTable <- as.data.frame(table(sc2[["cell_ontology_class"]])) # table of all factors and their count
cellTable <- cellTable[cellTable$Freq>=25,] # drop all below 25
cellsToKeep <- as.character(cellTable$Var1) # character vector for subsetting
sc2 <- sc2[, sc2[["cell_ontology_class"]] %in% cellsToKeep] # subset cells (columns)
sc2[["cell_ontology_class"]] <- droplevels(sc2[["cell_ontology_class"]]) # drop zero levels in factor
# counts as dgC
counts <- as(counts(sc2), "dgCMatrix") # convert from delayed matrix to dgC
# named cell type factor
cell_types <- colData(sc2)[, "cell_ontology_class"] # get cell type factor
names(cell_types) <- rownames(colData(sc2)) # name it!
cell_types <- as.factor(cell_types) # ensure factor
# named Umi Count
nUMI <- colData(sc2)[, "n_counts"]
names(nUMI) <- rownames(colData(sc2)) # name it!
# create Reference
reference <- spacexr::Reference(counts, cell_types, nUMI)
coords <- SpatialExperiment::spatialCoords(sp) # spatial coordinates
coords <- as.data.frame(coords)
counts <- counts(sp) # counts
nUMI <- colData(sp)[, "nCounts"]
# all three above are already named but that does not have to be the case
# in this specific case we must make gene names unique! There are duplicates
rownames(counts) <- make.names(rownames(counts), unique=T)
puck <- spacexr::SpatialRNA(coords, counts, nUMI)
RCTDObject <- spacexr::create.RCTD(puck, reference, max_cores = 3, UMI_min = 0) # NOTE UMI_min = 0!!!!
##
## B cell
## 43
## T cell
## 48
## brush cell
## 50
## epithelial cell of proximal tubule
## 49
## fenestrated cell
## 50
## fibroblast
## 36
## kidney collecting duct principal cell
## 49
## kidney cortex artery cell
## 45
## kidney distal convoluted tubule epithelial cell
## 50
## kidney loop of Henle thick ascending limb epithelial cell
## 49
## kidney mesangial cell
## 28
## kidney proximal convoluted tubule epithelial cell
## 50
## macrophage
## 51
## podocyte
## 53
RCTDObject <- spacexr::run.RCTD(RCTDObject)
## [1] "gather_results: finished 1000"
results <- RCTDObject@results
norm_weights <- spacexr::normalize_weights(results$weights) # normalize the weights to sum to one
norm_weights <- as.matrix(norm_weights)